* CGK_TAXSIM.do: Impute income taxes and taxable income for Coibion-Gorodnichenko-Kueng using the NBER TAXSIM calculator.
*
* Outline of procedure to impute taxes:
* - construct MEMB-TAXSIM variables and collapse them down to household-level variables (MEMB_TAXSIM.dta),
* - merge FMLY.dta
* - merge MTABpanel_monthly.dta
* - construct TAXSIM variables
* - 1st run of TAXSIM to determine AGI and itemization status
* - impose AGI rules using AGI and itemization status from 1st run
* - 2nd run of TAXSIM
* - 3rd run of TAXSIM to calculate statutory marginal tax rate (TAXSIM plan 51)
* - save as TAXSIM_output.dta
*
* Coibion-Gorodnichenko-Kueng; Initial version: March 2012; this version: May 2017


    *===================
    * Construct tax year
    *===================

use NEWIDunique intno date using "$data/CGK_Expenditures.dta", clear
collapse (max) date, by(NEWIDunique intno)
gen temp=dofm(date)
gen year = year(temp)
label var year "tax year"
drop temp date
$savetype "$data/stata/TaxYears.dta", replace


    *====================================================
    * Estimate annual expenditures for some TAXSIM inputs
    *====================================================

use NEWIDunique intdate rentaspay mealaspay rentpaid proptax occupexp mortgageint ///
  babysit elderly healthexpD healthexpS healthins using "$data/CGK_Expenditures.dta", clear
replace proptax=0 if intdate<m(1994m1) // there is a sample break in this variable that cannot be corrected
gen numbmonth = 1
gen healthexp = healthexpD + healthexpS + healthins
drop healthexpD healthexpS healthins
gen AAA=.
gen zzz=.
aorder
order NEWIDunique
collapse (sum) AAA-zzz, by(NEWIDunique)
foreach var in rentaspay mealaspay rentpaid proptax occupexp mortgageint babysit elderly healthexp {
  replace `var'=`var'*numbmonth/12 // estimated annual expenditures
}
drop numbmonth AAA zzz
compress
sort NEWIDunique
$savetype "$data/stata/MTAB_TAXSIMinputs.dta", replace



    *============================================
    * Construct TAXSIM inputs from MEMB variables 
    *============================================

* Construct MEMB-TAXSIM variables and save them as MEMB_TAXSIM.dta	
use SALARYXrb NONFARMXrb FARMINCXrb SOCRRXrb SSIX ANFEDTX ANSLTX CU_CODE AGE IN_COLL NEWIDunique intno MEMBNO ///
  using "$data/stata/MEMBindividual.dta", clear
	
* Construct indicator>0 if member paid taxes (or received refunds)
replace ANFEDTX =1 if ANFEDTX ==.| ANFEDTX <0 // Replace with an arbitrary positive number to indicate that 	
replace ANSLTX  =1 if ANSLTX  ==.| ANSLTX <0  // member payed taxes.		
gen tax = ANFEDTX +ANSLTX 				
capture noisily: drop ANFEDTX* ANSLTX*

* Approximation of gross income
gen grossinc = SALARYXrb +NONFARMX +FARMINCXrb +SOCRRXrb 

* Construct age of reference person and spouse
* (Note: We first need to generate a unique reference person since CU_CODE=1 and CU_CODE==2 are not unique)
preserve
keep NEWIDunique CU_CODE AGE
gsort NEWIDunique CU_CODE -AGE // use older member if CU_CODE is not unique
duplicates drop NEWIDunique CU_CODE, force
foreach i of numlist 1 2 0 6 7 5 8 9 {
 gen age`i' = AGE if CU_CODE==`i'
 replace age`i'=0 if age`i'==.
}
keep NEWIDunique age*
collapse (sum) age*, by(NEWIDunique)
foreach i of numlist 0 6 7 5 8 9 { // a few households do not have a reference person or a spouse, i.e. CU_CODE is neither 1 or 2.
 replace age2=age`i' if age1==0 & age2==0 & age`i'!=0
}
keep NEWIDunique age1 age2
sort NEWIDunique
save "$data/tempfiles/temp_age.dta", replace
restore
merge m:1 NEWIDunique using "$data/tempfiles/temp_age.dta", nogen
rm "$data/tempfiles/temp_age.dta"

* Qualified children (for 2009)
gen qualchld = (grossinc<3650)                                     /// approximate gross income test instead of infeasible self-support test		
             & (CU_CODE==3 | CU_CODE==4 | CU_CODE==6 | CU_CODE==7) /// relationship test 			
             & (AGE<age1 | AGE<age2)                               /// first age test			
             & (AGE<19 | (AGE<24 & IN_COLL==1) )                   //  second age test (either under age 19 or under 24 and a college student)

* Qualified relatives (for 2009)
gen qualrel = (grossinc<3650)           /// approximate gross income test instead of infeasible self-support test		
            & (qualchld!=1)             /// not a qualifying child		
            & (CU_CODE!=1 & CU_CODE!=2) //  not CU head of household nor spouse	


* Calculate TAXSIM input variables (depx, agex, depchild, pwages, swages): 

* Number of (qualified) dependents:
gen depx = qualrel+qualchld		
						
* Number of CU taxpayers over age 65:
gen agex = (AGE>65)&(tax>0)		
	 
* Number of children under age 17 (for child tax credit in TAXSIM):					
gen depchild = (grossinc<3650) ///
             & (CU_CODE==3 | CU_CODE==4 | CU_CODE==6| CU_CODE==7) ///
             & (AGE<17)		
						
* Wage and salary income (including self-employment) of primary taxpayer
gen pwages = SALARYXrb +NONFARMX +FARMINCXrb if CU_CODE==1
replace pwages = -999999999 if pwages==. & CU_CODE==1

* Wage and salary income of spouse (including self-employment)
*  Note: I include all other household income in the secondary earner income. If the primary household is single
*        or head of household, we will add any secondary income to the primary taxpayer's income below.
gen swages = SALARYXrb +NONFARMX +FARMINCXrb if CU_CODE!=1
replace swages = -999999999 if swages==. & CU_CODE!=1
replace SOCRRXrb = -999999999 if SOCRRXrb==.
replace SSIX = -999999999 if SSIX==.

collapse (sum) pwages swages SOCRRXrb SSIX depx agex depchild, by(NEWIDunique intno)
replace pwages=. if pwages<-100000000
replace swages=. if swages<-100000000
replace SOCRRXrb=. if SOCRRXrb<-100000000
replace SSIX =. if SSIX<-100000000
sort NEWIDunique intno
$savetype "$data/stata/MEMB_TAXSIMinputs.dta", replace



    *================
    * Merge data sets
    *================

* Load FMLY variables
use NEWIDunique intno QINTRVYR /// intdate QINTRVMO  FINLWT21 fwt RESPSTAT ///
 UNEMPLXrb  COMPENSXrb WELFAREXrb INTEARNX FININCXrb PENSIONXrb INCLOSSArb INCLOSSBrb ///
 OTHRINCXrb FOODSMPXrb INCCONTXrb FEDTAXX  SLOCTAXX  TAXPROPX   FEDRFNDX   SLRFUNDX MISCTAXX OTHRFNDX /// 
 INCCONTXrb SALEINCX   SETLINSX   LUMPSUMX SSOVERPX  INSRFNDX   COLLEXPX   ALIMOX PTAXRFDX TAXPROPX CNTRCHRX CNTEDORX CNTRELGX FINDRETX STATE MARITAL1 ///
  using "$data/stata/FMLY.dta", clear
				
* Merge MEMB variables
merge 1:1 NEWIDunique intno using "$data/stata/MEMB_TAXSIMinputs.dta"
keep if _merge==3
drop _merge

* Merge MTAB variables. Note: This imposes the sample selection on the expenditure variables.
merge m:1 NEWIDunique using "$data/stata/MTAB_TAXSIMinputs.dta"
keep if _merge==3
drop _merge

* Merge tax year
merge 1:1 NEWIDunique intno using "$data/stata/TaxYears.dta", keepusing(year) 
keep if _merge==3
drop _merge



    *=============================================================================================
    * Mean impute smaller items that are not components of our definition of 'income before taxes'
    *=============================================================================================

* The following variables are discontinued and we currently don't have a good solution to get around this.
* Hence we set them to zero for the moment.
replace FOODSMPX=0 if FOODSMPX==. & year>=1982 & year<=1984 
replace CNTEDORX=0 if CNTEDORX==. & year>=2001
replace CNTRCHRX=0 if CNTRCHRX==. & year>=2001
replace CNTRELGX=0 if CNTRELGX==. & year>=2001
replace COLLEXPX=0 if COLLEXPX==. & year>=2001
replace ALIMOX  =0 if ALIMOX  ==. & year>=2001
replace COLLEXPX=0 if COLLEXPX==. & year>=2001

* use mean imputation for remaining missing values 
foreach var in SALEINCX SETLINSX LUMPSUMX SSOVERPX INSRFNDX FOODSMPX TAXPROPX ///
               CNTEDORX CNTRCHRX CNTRELGX COLLEXPX ALIMOX   COLLEXPX SLRFUNDX PTAXRFDX {
 di "`var'"
 qui reg `var' i.year if `var'!=0, nocons
 predict yhat
 replace `var'=yhat if `var'==.
 drop yhat
}



    *============================================
    * Construct the 21 TAXSIM input variables
    *============================================


* 1. tax year
* year // constructed from MTAB files

* 2. state (SOI codes)
do "$home/do-files/TAXSIMstateCEXtoSOI.do" // mapping of state codes from CEX to SOI definition.
gen state = stateSOI

* 3. marital status
gen     mstat = 1                           // single filer (default)
replace mstat = 2 if  MARITAL1==1           // married (assumed to file jointly)
replace mstat = 3 if (MARITAL1!=1 & depx>0) // head of household

* 4. number of dependents
*depx // constructed from MEMB files

* 5. number of taxpayers age>65 (0,1, or 2)
*agex // constructed from MEMB files

* 6. wage and salary of taxpayer (incl. self-employment). Note: Will be dropped by TAXSIM if negative.
*pwages // constructed from MEMB files 

* 7. wage and salary of spouse (incl. self-employment). Note: Will be dropped by TAXSIM if negative.
*swages // constructed from MEMB files

replace pwages=pwages+swages if mstat!=2 // total family income if not married (e.g. head of household for tax purposes)
replace swages=0             if mstat!=2 // no other income if not married

* 8. dividends. Note: Qualified dividends only from 2003 on. 
gen dividends = FININCXrb  // dividends, royalties, estates, trusts. Note: We have to assume that those are ordinary, not qualified dividends.

* 9. interest and other property income
gen otherprop = ///
  INTEARNX   /// interest from savings accounts + bonds. Note: We need to assume that interest is fully taxable.
+ OTHRINCXrb /// other income (scholarships, stipends,...)
+ INCLOSSArb /// income from roomers + borders
+ INCLOSSBrb /// income from other rental units
+ INCCONTXrb /// income from alimony + outside contributions. Note: For time consistency, we combine INCCONTX=ALIOTHX+CHDOTHX.
+ SALEINCX   /// sale of goods
+ SETLINSX   /// insurance settlement receipts
+ LUMPSUMX   /// estates, trusts,royalties,alimony,lumpsum payments,...
+ SSOVERPX   /// social security refunds received from overpayment. Note: There might be a tax penalty associated with this.
+ INSRFNDX   /// insurance policy refunds received
+ rentaspay  /// rent as pay (constructed from MTAB files)
+ mealaspay  /// meal as pay (constructed from MTAB files)
/*other adjustments not reported elsewhere*/ ///
- FINDRETX   /// contribution to IRA, Keogh
- COLLEXPX   /// student support paid
- ALIMOX     /// alimony paid
/*income depending on itemization status*/ ///
+ SLRFUNDX   /// state and local income tax refund received 
+ PTAXRFDX   //  property tax refund received. Note: SLRFUNDX and PTAXRFDX is taxable income only for itemizers. 
             //  It is excluded from income in the second run of TAXSIM if the standard deduction was chosen.

* 10. taxable pensions
cap:drop pensions // Note: pensions is an (unrelated) expenditure item on MTAB  
gen pensions = PENSIONXrb  // pensions, private annuities, annuities from IRA,Keogh

* 11. gross social security income
gen gssi = SOCRRXrb  // social security and railroad retiremet income. Note: SOCRRX combines FRRETIRX.

* 12. other non-taxable transfer income
gen transfers = ///
  COMPENSXrb /// workers' compentsation + veterans' benefits
+ WELFAREXrb /// public assistance + welfare 
+ FOODSMPXrb /// food stamps and electronic benefits. Note: FOODSMPX combines JFDSTMPA and is missing 1982-86.
+ SSIX       //  supplemental security income

* 13. rent paid
*rentpaid // constructed from MTAB files 

* 14. property and other taxes (part of itemized deductions)
replace proptax = ///
  proptax         /// property taxes paid (constructed from MTAB files)
+ TAXPROPX        //  personal property taxes for vehicles. Note: TAXPROPX seems to be inconsisent over time (i.e. 1990s).

* 15. additional personal itemized deductions (except mortgage,state and property tax)
gen otheritem = ///
  CNTRCHRX   /// charitable contribution
+ CNTEDORX   /// educational contribution
+ CNTRELGX   //  religious contribution
// +occupexp+healthexp: contained in 2nd run of TAXSIMmain.do after adjustment for AGI rules. 
// Note: 'occupexp' are job expenses and 'healthexp' are medical and dental expenses 
//        (both are constructed from the MTAB files). 

* 16. child care expenses
gen childcare = ///
  babysit  /// babysitting and day-care centers, nursery, and preschools (constructed from MTAB files)
+ elderly  //  care for elderly, invalids, handicapped, and adult day care center expense (constructed from MTAB files)

* 17. unemployment compensation
gen ui = UNEMPLXrb 

* 18. number of dependents under age 17
*depchild // constructed from MEMB files

* 19. mortgage interest
gen mortgage = mortgageint // interest from mortgage, home equity loan, home equity line of credit and prepayment penalty 
                           // charges from (i) owned dwellings and (ii) owned vacation homes (constructed from MTAB files)

* 20. short-term capital gains
gen stcg = 0 // insufficient information in CE survey

* 21. long-term capital gains
gen ltcg = 0 // insufficient information in CE survey


* Imposing TAXSIM's restrictions to prevent a crash
replace  mstat    =1  if mstat ==.
replace  depx     =15 if depx>15 
replace  depchild =depx if depchild>depx 

keep NEWIDunique intno year ///
 state mstat depx agex pwages swages dividends otherprop pensions gssi transfers ///
 rentpaid proptax otheritem childcare ui depchild mortgage stcg ltcg occupexp healthexp ///
 SLRFUNDX PTAXRFDX  

* check data
*tabmiss
*sum
compress
sort NEWIDunique
$savetype "$data/stata/TAXSIM_input.dta", replace



    *==============
    * Impute Taxes
    *==============

	* 1st run of TAXSIM to determine AGI and itemization status:

use "$data/stata/TAXSIM_input.dta", clear
capture noisily: net install taxsim9
di c(current_time)	
taxsim9, replace full
di c(current_time)	
do "$home/do-files/TAXSIMrename.do" 


	* Use AGI from 1st run to adjust for AGI rules and itemization status:

* healthcare expenses
gen AGItest82=0.03 *fedAGI // AGI rule for 1960-1982 Note: 'fedAGI' is TAXSIM variable v10
gen AGItest86=0.05 *fedAGI // AGI rule for 1983-1987
gen AGItest87=0.075*fedAGI // AGI rule from 1987 on 
gen temp=.
replace temp =healthexp-AGItest82 if healthexp> AGItest82 & (year<=1982)
replace temp =0                   if healthexp<=AGItest82 & (year<=1982)
replace temp =healthexp-AGItest86 if healthexp> AGItest86 & (year> 1982 & year<=1986)
replace temp =0                   if healthexp<=AGItest86 & (year> 1982 & year<=1986)
replace temp =healthexp-AGItest87 if healthexp> AGItest87 & (year> 1986)
replace temp =0                   if healthexp<=AGItest87 & (year> 1986)
replace healthexp=temp
drop temp

* job expenses
gen AGItest=0.02*fedAGI
gen temp=.
replace temp =occupexp-AGItest if occupexp> AGItest
replace temp =0                if occupexp<=AGItest
replace temp =occupexp
drop AGItest* temp
replace otheritem =otheritem+occupexp+healthexp
drop taxsimid

* state and local tax and property tax refund income
replace otherprop=otherprop-(SLRFUNDX+PTAXRFDX) if dedallowed==0 // Note: 'dedallowed' is TAXSIM variable v17 

keep NEWIDunique intno year ///
 state mstat depx agex pwages swages dividends otherprop pensions gssi transfers ///
 rentpaid proptax otheritem childcare ui depchild mortgage stcg ltcg occupexp ///
 // healthexp SLRFUNDX PTAXRFDX  

compress
sort NEWIDunique
$savetype "$data/stata/TAXSIM_firstrun.dta", replace


    * 2st run of TAXSIM to calculate taxes

* TAXSIM with AGI rules and itemization status
use "$data/stata/TAXSIM_firstrun.dta", clear
di c(current_time)	
taxsim9, replace full
di c(current_time)	
do "$home/do-files/TAXSIMrename.do" 
drop taxsimid
compress
sort NEWIDunique
label data "Income Taxes from TAXSIM"
$savetype "$data/stata/TAXSIM.dta", replace

* keep variables of interest
use "$data/stata/TAXSIM.dta", clear
keep NEWIDunique intno fiitax siitax 
$savetype "$data/CGK_TAXSIMtaxes.dta", replace
